# Replication Archive for: 
# Coppock, Alexander and Donald P. Green. 2020. 
# "Do Belief Systems Exhibit Dynamic Constraint?" 
# The Journal of Politics, Forthcoming.


rm(list = ls())

library(tidyverse)
library(lubridate)

dat <- read_csv(file = "data/raw/study_3_lucid_raw.csv")

dat <-
  dat %>%
  filter(Status == "IP Address",
         !is.na(rid),
         consent == "I agree to participate")


original <- read_rds("data/clean/original_trump_white_clean.rds")

# administrative recodes --------------------------------------------------

dat <-
  dat %>%
  mutate(
    admin_ResponseId = ResponseId,
    admin_StartDate = ymd_hms(StartDate),
    admin_EndDate = ymd_hms(EndDate),
    admin_RecordedDate = ymd_hms(RecordedDate),
    admin_duration_seconds = as.numeric(`Duration (in seconds)`),
    admin_duration_seconds = difftime(EndDate, StartDate, unit = "secs"),
    admin_latitude = as.numeric(LocationLatitude),
    admin_longitude = as.numeric(LocationLongitude),
    admin_finished = as.numeric(Finished)
  )

# Demographic Recodes -----------------------------------------------------

dat <-
  dat %>%
  mutate(
    dem_birthyr = as.numeric(dem_birthyr),
    dem_age = 2020 - dem_birthyr,
    dem_female = as.numeric(dem_gender == "Female"),
    dem_birthplace = ifelse(
      str_detect(wuhuber_generation, "I was born"),
      "The United States",
      "Another country"
    )
  )
# Lucid demographic recodes -----------------------------------------------

dat <-
  dat %>%
  mutate(
    lucid_age = as.numeric(age),
    
    age_5 =
      case_when(
        18 <= lucid_age & lucid_age <= 29 ~ "18-29",
        30 <= lucid_age & lucid_age <= 39 ~ "30-39",
        40 <= lucid_age & lucid_age <= 49 ~ "40-49",
        50 <= lucid_age & lucid_age <= 59 ~ "50-99",
        60 <= lucid_age  ~ "60+",
      ),
    
    
    lucid_education =
      case_when(
        education	== 1 ~ "Some high school or less",
        education	== 2 ~ "High school graduate",
        education	== 3 ~ "Other post high school vocational training",
        education	== 4 ~ "Completed some college, but no degree",
        education	== 5 ~ "Associate's degree",
        education	== 6 ~ "Bachelor's degree",
        education	== 7 ~ "Master's or professional degree",
        education	== 8 ~ "Doctorate degree",
        education	== -3105 ~ "None of the above"
      ),
    
    educ_5 =
      case_when(
        education	%in% c(1,-3105) ~ "Less than High School",
        education	%in% c(2, 3) ~ "High School",
        education	%in% c(4, 5) ~ "Some College",
        education	%in% c(6) ~ "College",
        education	%in% c(7, 8) ~ "Graduate School"
      ),
    
    educ_5 = factor(
      educ_5,
      levels = c(
        "Less than High School",
        "High School",
        "Some College",
        "College",
        "Graduate School"
      )
    ),
    
    lucid_ethnicity =
      case_when(
        ethnicity	== 1 ~ "White",
        ethnicity	== 2 ~ "Black, or African American",
        ethnicity	== 3 ~ "American Indian or Alaska Native",
        ethnicity	== 4 ~ "Asian *** Asian Indian",
        ethnicity	== 5 ~ "Asian *** Chinese",
        ethnicity	== 6 ~ "Asian *** Filipino",
        ethnicity	== 7 ~ "Asian *** Japanese",
        ethnicity	== 8 ~ "Asian *** Korean",
        ethnicity	== 9 ~ "Asian *** Vietnamese",
        ethnicity	== 10 ~ "Asian *** Other",
        ethnicity	== 11 ~ "Pacific Islander *** Native Hawaiian",
        ethnicity	== 12 ~ "Pacific Islander *** Guamanian",
        ethnicity	== 13 ~ "Pacific Islander *** Samoan",
        ethnicity	== 14 ~ "Pacific Islander *** Other Pacific Islander",
        ethnicity	== 15 ~ "Some other race",
        ethnicity	== 16 ~ "Prefer not to answer"
      ),
    
    
    
    
    
    lucid_gender =
      case_when(gender == 1 ~ "Male",
                gender == 2 ~ "Female",
                TRUE ~ "Male"),
    
    lucid_female = as.numeric(lucid_gender == "Female"),
    
    female = lucid_female,
    
    lucid_hhi =
      case_when(
        hhi == 1 ~ "Less than $14,999",
        hhi == 2 ~ "$15,000 to $19,999",
        hhi == 3 ~ "$20,000 to $24,999",
        hhi == 4 ~ "$25,000 to $29,999",
        hhi == 5 ~ "$30,000 to $34,999",
        hhi == 6 ~ "$35,000 to $39,999",
        hhi == 7 ~ "$40,000 to $44,999",
        hhi == 8 ~ "$45,000 to $49,999",
        hhi == 9 ~ "$50,000 to $54,999",
        hhi == 10 ~ "$55,000 to $59,999",
        hhi == 11 ~ "$60,000 to $64,999",
        hhi == 12 ~ "$65,000 to $69,999",
        hhi == 13 ~ "$70,000 to $74,999",
        hhi == 14 ~ "$75,000 to $79,999",
        hhi == 15 ~ "$80,000 to $84,999",
        hhi == 16 ~ "$85,000 to $89,999",
        hhi == 17 ~ "$90,000 to $94,999",
        hhi == 18 ~ "$95,000 to $99,999",
        hhi == 19 ~ "$100,000 to $124,999",
        hhi == 20 ~ "$125,000 to $149,999",
        hhi == 21 ~ "$150,000 to $174,999",
        hhi == 22 ~ "$175,000 to $199,999",
        hhi == 23 ~ "$200,000 to $249,999",
        hhi == 24 ~ "$250,000 and above",
        hhi == -3105 ~ "Prefer not to answer"
      ),
    lucid_hispanic_full =
      case_when(
        hispanic == 1 ~ "No , not of Hispanic, Latino, or Spanish origin",
        hispanic == 2 ~ "Yes, Mexican, Mexican American, Chicano",
        hispanic == 3 ~ "Yes, Cuban",
        hispanic == 4 ~ "Yes, another Hispanic, Latino, or Spanish origin *** Argentina ",
        hispanic == 5 ~ "Yes, another Hispanic, Latino, or Spanish origin *** Colombia ",
        hispanic == 6 ~ "Yes, another Hispanic, Latino, or Spanish origin *** Ecuador ",
        hispanic == 7 ~ "Yes, another Hispanic, Latino, or Spanish origin *** El Salvadore ",
        hispanic == 8 ~ "Yes, another Hispanic, Latino, or Spanish origin *** Guatemala ",
        hispanic == 9 ~ "Yes, another Hispanic, Latino, or Spanish origin *** Nicaragua ",
        hispanic == 10 ~ "Yes, another Hispanic, Latino, or Spanish origin *** Panama ",
        hispanic == 11 ~ "Yes, another Hispanic, Latino, or Spanish origin *** Peru ",
        hispanic == 12 ~ "Yes, another Hispanic, Latino, or Spanish origin *** Spain ",
        hispanic == 13 ~ "Yes, another Hispanic, Latino, or Spanish origin *** Venezuela ",
        hispanic == 14 ~ "Yes, another Hispanic, Latino, or Spanish origin *** Other Country",
        hispanic == 15 ~ "Prefer not to answer"
      ),
    lucid_hispanic = as.numeric(hispanic != 1),
    
    
    race_4 =
      case_when(
        lucid_hispanic == 1 ~ "Hispanic",
        ethnicity	== 1 ~ "White",
        ethnicity	== 2 ~ "Black",
        TRUE ~ "Other"
      ),
    
    
    
    lucid_pid_7n =
      case_when(
        political_party	== 1 ~ 1,
        # Strong D
        political_party	== 2 ~ 2,
        # Not v. strong D
        political_party	== 3 ~ 3,
        # Independent D
        political_party	== 4 ~ 4,
        # Pure independent
        political_party	== 5 ~ 5,
        # Independent R
        political_party	== 6 ~ 3,
        # Lean Democrat
        political_party	== 7 ~ 4,
        # Independent
        political_party	== 8 ~ 5,
        # Lean Republican
        political_party	== 9 ~ 6,
        # Not v. strong R
        political_party	== 10  ~  7 # Strong R
      ),
    lucid_pid_7 =
      case_when(
        lucid_pid_7n == 1 ~ "Strong Democrat",
        #lucid_pid_7n == 2 ~ "Not a strong Democrat",
        lucid_pid_7n == 2 ~ "Weak Democrat",
        lucid_pid_7n == 3 ~ "Lean Democrat",
        #lucid_pid_7n == 4 ~ "Pure Independent",
        lucid_pid_7n == 4 ~ "Independent",
        lucid_pid_7n == 5 ~ "Lean Republican",
        #lucid_pid_7n == 6 ~ "Closer to Republican",
        lucid_pid_7n == 6 ~ "Weak Republican",
        lucid_pid_7n == 7 ~ "Strong Republican"
      ),
    
    pid_7 = lucid_pid_7n,
    
    lucid_pid_7_factor =
      factor(
        lucid_pid_7,
        levels = c(
          "Strong Democrat",
          "Weak Democrat",
          "Lean Democrat",
          "Independent",
          "Lean Republican",
          "Weak Republican",
          "Strong Republican"
        )
      ),
    lucid_pid_3 = case_when(
      lucid_pid_7n < 4 ~ "Democrat",
      lucid_pid_7n == 4 ~ "Independent",
      lucid_pid_7n > 4 ~ "Republican"
    ),
    lucid_region =
      case_when(
        region == 1 ~ "Northeast",
        region == 2 ~ "Midwest",
        region == 3 ~ "South",
        region == 4 ~ "West"
      ),
    lucid_zip = zip
  )

# politics recodes --------------------------------------------------------

dat <-
  dat %>%
  rename(pol_ideo_7 = pol_ideo7) %>%
  mutate(
    pol_ideo_7n = dplyr::recode(
      pol_ideo_7,
      `Extremely Liberal` = 1,
      `Liberal` = 2,
      `Slightly Liberal` = 3,
      `Moderate, Middle of Road` = 4,
      `Slightly Conservative` = 5,
      `Conservative` = 6,
      `Extremely Conservative` = 7,
    ),
    pol_ideo_3 = case_when(
      pol_ideo_7n %in% 1:3 ~ "Liberal",
      pol_ideo_7n == 4 ~ "Moderate",
      pol_ideo_7n %in% 5:7 ~ "Conservative"
    )
  )

tw_flip <-
  function(X, flip = NA) {
    -1 * X + flip
  }

tw_9point <-
  function(X) {
    case_when(
      X == "Strongly disagree" ~ "1",
      X == "Neither agree nor disagree" ~ "5",
      X == "Strongly agree" ~ "9",
      TRUE ~ X
    ) %>%
      as.numeric()
  }

tw_3point <-
  function(X) {
    case_when(X == "A Great Deal" ~ 1,
              X == "Only Some" ~ 2,
              X == "Hardly Any" ~ 3, )
  }

dat <-
  dat %>%
  mutate(
    tw_Z = factor(trumpwhite_Z, levels = c("pure_control", "control", "treatment")),
    tw_manip_1 = as.numeric(`trump white_inequal` == "Correct"),
    tw_manip_2 = as.numeric(`trump white_income` == "Incorrect"),
    tw_scale_1 =
      tw_9point(Q423_1) +
      tw_9point(Q423_2) +
      tw_flip(tw_9point(Q423_3), 10) +
      tw_9point(Q423_4) +
      tw_9point(Q423_5) +
      tw_9point(Q423_6) +
      tw_flip(tw_9point(Q423_7), 10) +
      tw_9point(Q423_8),
    
    tw_scale_2 =
      tw_3point(Q430_1) +
      tw_3point(Q430_2) +
      tw_3point(Q430_3) +
      tw_3point(Q430_4) +
      tw_3point(Q430_5) +
      tw_3point(Q430_6),
    
    tw_scale_3 =
      tw_9point(Q437_1) +
      tw_9point(Q437_2) +
      tw_9point(Q437_3) +
      tw_flip(tw_9point(Q437_4), 10) +
      tw_9point(Q437_5) +
      tw_flip(tw_9point(Q437_6), 10) +
      tw_9point(Q437_7) +
      tw_flip(tw_9point(Q438_1), 10) +
      tw_flip(tw_9point(Q438_2), 10) +
      tw_9point(Q438_3) +
      tw_9point(Q438_4) +
      tw_flip(tw_9point(Q438_5), 10) +
      tw_9point(Q438_6) +
      tw_flip(tw_9point(Q438_7), 10) +
      tw_9point(Q438_8)
  )


dat <-
  dat %>%
  mutate(
    SJ_s =
      (tw_scale_1 - mean(original$tw_scale_1[original$tw_Z == "control"], na.rm = TRUE)) /
      sd(original$tw_scale_1[original$tw_Z == "control"], na.rm = TRUE),
    ESJ_s = (tw_scale_3 - mean(original$tw_scale_3[original$tw_Z == "control"], na.rm = TRUE)) /
      sd(original$tw_scale_3[original$tw_Z == "control"], na.rm = TRUE)
  )




# select cleaned columns and output ---------------------------------------

dat_cleaned <-
  dat %>%
  select(
    starts_with("admin_"),
    starts_with("dem_"),
    starts_with("lucid_"),
    starts_with("pol_"),
    starts_with("tw_"),
    -contains("timer"),
    age_5,
    educ_5,
    race_4,
    pid_7,
    female,
    SJ_s,
    ESJ_s
  )

write_rds(dat_cleaned, "data/clean/replication_trump_white_clean.rds")
